$title  demand.GMS


* ###################################

* Per capita income, consumption patterns, and CO2 emissions

* Journal of the Association of Environmental and Resource Economists

* Justin Caron and Thibault Fally

* April 2021

*###################################


* ESTIMATE DEMAND SYSTEM PARAMETERS - NH CES DEMAND SYSTEM - QUADRATIC VERSION

$set SLASH \
$if %system.filesys% == UNIX $set SLASH /


$if not set datadir $set datadir "data%SLASH%"
$setglobal datadir %datadir%


* ---------------------------------------------------------------------------------------------
* OPTIONS :


* SPECIFICATION ?
* choices : tc, theta4
$if not set spec $set spec theta4

* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted

* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting yes


* BOOSTRAP ?
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4


$if not set ds $set ds gtap8gas
$include loaddata_gtap8.gms


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
                tcostest
                logphiest
                IM
                EX
                cst;


$gdxin estimates\gravityestimates_%ds%.gdx
$load  coeffs phiest im ex cst tcostest

parameter esttheta;
esttheta(i) = 0;

display coeffs;


* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions;
i_(i) = no;


$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);



* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;

* use this parameter in excel to get this set of regions :
set rskm(r) regions with skill ratio ensowments within the 2 middle quartiles
   /MYS, VNM, GEO,KAZ, EGY, ALB, PAK, NGA, SEN, THA, PHL, TUR, ZWE, LTU, COL, LVA, SVK, KOR, TUN, ARG, BOL, HRV, HUN, VEN, MLT, RUS, CZE, SVN, CHL, UKR, MUS, CRI, PAN, IRN, EST, POL,
  NZL, CYP, URY, BRA, ZAF, SGP, CAN, BWA, BGR/ ;

* other sets :

set red(r) / ALB, ARM, AZE, BGD, BGR, BLR, BOL, CHN, COL, ECU, EGY, GEO, GTM, IDN, IND, KAZ, KGZ, LKA, MAR, MYS, NGA, NIC, PAK, PER, PHL, PRY, ROU, SEN, THA, TUR, UKR, VNM, ZMB, ZWE /;

set blue(r) / ARG, BRA, BWA, CHL, CRI, CYP, CZE, EST, HRV, HUN, IRN, KOR, LTU, LVA, MEX, MLT, MUS, NZL, PAN, POL, RUS, SVK, SVN, TUN, URY, VEN, ZAF /;

set green(r) / AUS, AUT, BEL, CAN, CHE, DEU, ESP, FIN, FRA, GBR, GRC, HKG, IRL, ITA, JPN, NLD, PRT, SGP, SWE, TWN /;



set nored(r) / ARG, AUS, AUT, BEL, BRA, BWA, CAN, CHE, CHL, CRI, CYP, CZE, DEU, DNK, ESP, EST, ETH, FIN, FRA, GBR,
GRC, HKG, HRV, HUN, IRL, IRN, ITA, JPN, KHM, KOR, LAO, LTU, LUX, LVA, MDG, MEX, MLT,  MOZ, MUS, MWI, NLD, NOR,
NZL, PAN, POL, PRT, RUS, SGP, SVK, SVN, SWE, TUN, TWN, TZA, UGA, URY, USA, VEN, ZAF /;


set noblue(r) / ALB, ARM, AUS, AUT, AZE, BEL, BGD, BGR, BLR, BOL, CAN, CHE, CHN, COL, DEU, DNK, ECU, EGY, ESP, ETH,
FIN, FRA, GBR, GEO, GRC, GTM, HKG, IDN, IND, IRL, ITA, JPN, KAZ, KGZ, KHM, LAO, LKA, LUX, MAR, MDG,  MOZ, MWI,
MYS, NGA, NIC, NLD, NOR, PAK, PER, PHL, PRT, PRY, ROU, SEN, SGP, SWE, THA, TUR, TWN, TZA, UGA, UKR, USA, VNM, ZMB, ZWE /;


set nogreen(r) / ALB, ARG, ARM, AZE, BGD, BGR, BLR, BOL, BRA, BWA, CHL, CHN, COL, CRI, CYP, CZE, DNK, ECU, EGY, EST, ETH,
GEO, GTM, HRV, HUN, IDN, IND, IRN, KAZ, KGZ, KHM, KOR, LAO, LKA, LTU, LUX, LVA, MAR, MDG, MEX, MLT,  MOZ, MUS, MWI,
MYS, NGA, NIC, NOR, NZL, PAK, PAN, PER, PHL, POL, PRY, ROU, RUS, SEN, SVK, SVN, THA, TUN, TUR, TZA, UGA, UKR, URY, USA,
VEN, VNM, ZAF, ZMB, ZWE /;



* trick to have r_ defined as "base" set - need that for bootstrapping, cant use ORD on an unstable set
execute_unload 'setr.gdx', rall,  red, blue, green, rskm;

$gdxin setr

* IF USING RED BLUE GREEN SETS, USE THIS PART OF THE CODE :
$if "%regsubset%"=="red" $load r_=red
$if "%regsubset%"=="blue" $load r_=blue
$if "%regsubset%"=="green" $load r_=green
$if "%regsubset%"=="rall" $load r_=rall
$if "%regsubset%"=="riq" $load r_=riq
$if "%regsubset%"=="rskm" $load r_=rskm


display r_;

* dropping intermediates YES NEED TO DO THIS
i_(intermediates) = no;

i_("coa") = yes;
i_("gas") = yes;

i_("p_c") = yes;
i_("ely") = yes;

i_("obs") = yes;
i_("ofi") = yes;
i_("trd") = yes;

* DROPPING DWELLINGS :
i_("dwe") = no;


display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;


parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;

display r_, i_;


* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  = sum(r_,fd(i,r_)) / sum((r_,i.local),fd(i,r_));

* ALT WEIGHING SCHEMES, CHOSE HERE:
*	 maximum exp share
indexp(i)  = smax(r_, x(i,r_)/w(r_));
logphi(i,r)$phi(i,r) =log(phi(i,r));

* ignoring observations with very low shares of coal and gas

parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));
sectdrop(i_,r_) = 1;

* looking at histogram, this seems to be a good cut-off
* drops 48 of 109 countries
sectdrop(i_,r_)$(expshare(i_,r_) < 0.0001 and sameas(i_,"gas")) =0 ;

* 0.0001: drops 53 ; 0.00001, drops 37
sectdrop(i_,r_)$(expshare(i_,r_) < 0.0001 and sameas(i_,"coa")) =0 ;

* drop in reporting only:
sectdrop(i_,r_) = 1;
sectdrop(i_,r_)$(NOT fd(i_,r_)) =0 ;



* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
*                fittedexpshare  fitted expenditure share
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
                modelselection;


* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;
wt(r_)=1;

alias (r_,k,kk,z,zz);
*cardzz(k,kk)=(ord(k)-1)*card(k) + ord(kk);
* PROBLEM CANNOT USE ORD ON AN "UNSTABLE SET", see correction "trick" above
cardzz(k) = ord(k);
dim=card(k);
display dim, cardzz;


* which country-sector pairs are missing
set missingx(i,r);

missingx(i_,r_) = yes;
missingx(i_,r_)$x(i_,r_) = no;

display missingx;

parameter fe_norm(i);

fe_norm(i) = 10*indexp(i);
fe_norm("pdr") = indexp("pdr") / 100;

* ---------------------------------------------------------
* -- DEFINE MODEL :

variables sse_;
Positive variables       U_(r), theta_, delta_;
variables rho_, nu_, x_, b_  ;
VARIABLES mu_(i), sigma_, epsilondiff_(i), fe_(i) the LOGGED fixed effect, commonrho_  ;

equations  obj_log, obj_consshare, obj_logweighted, budget, commontheta,  epsilon_fact, commondelta, est_theta, sigmadiff, positive_g_cstr, u_cstr, 
commonrho, obj_logweighted_h, budget_H, positive_g_cstr_H;

set lowincome(r);

lowincome(r_)$(log(w(r_)) < 8.65) = 1;

set	se(i)	secondary energy        /coa,ely,gas, p_c/  ;

* -----------------------
* CLM ESTIMATION


* this is the CLM objective function
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_),
 sectdrop(i_,r_)*
                indexp(i_) *
                 wt(r_) * sqr(log(x(i_,r_)) - (
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 + (1-sigma_) *
 (  fe_(i_)*fe_norm(i_) + rho_(i_)   * log(U_(r_) + b_(i_) ))
)));


* NOTE, IN THIS SPECICITATION, CANNOT PUT (1-SIGMA) IN G
* this leads to different results for homoth, however
* SOLUTION was to modify the h code to look like the other specs

obj_logweighted_H.. sse_ =e=      sum((i_,r_)$x(i_,r_),
 sectdrop(i_,r_)*
                indexp(i_) *
                 wt(r_) * sqr(log(x(i_,r_)) - (

 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
 (  fe_(i_)*fe_norm(i_) + rho_(i_)   * log(U_(r_) + b_(i_) ))
)
));


* constraint on mu
commontheta(i_) ..      mu_(i_)  =e= ( sigma_ -1) / theta_;

* this doesn't seem to work well
commonrho(i_) ..      rho_(i_)  =e= rho_("obs") ;

* this plus constraint that rho is positive (done below)
positive_g_cstr(i_,R_) ..  U_(R_) + b_(i_)  =g= 0 ;

* FOR homoth
positive_g_cstr_H(i_,R_) ..  ( rho_(i_) ) / (1-sigma_)  =g= 0 ;


* in logs (should be same but just testing)
budget(r_) ..   sum(i_, exp( 
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
 (1-sigma_) *
 (  fe_(i_)*fe_norm(i_) + rho_(i_)  * log(U_(r_) + b_(i_))  )


)) =e= w(r_);


budget_H(r_) ..   sum(i_, exp( 
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
* (1-sigma_) *
 (  fe_(i_)*fe_norm(i_) + rho_(i_)  * log(U_(r_) + b_(i_))  )
)) =e= w(r_);


* initialize variables:
sse_.L = 0;

epsilondiff_.L(i_) = 0.25;
*epsilon_.LO(i_) = 1E-9;
epsilondiff_.UP(i_) = 1e3;

* fix one epsilon_ to one (textiles: one that is not a service industry) :
* solution is indep of this choice (if none of the bounds are binding)
*epsilon_.FX("tex") =0.6;


U_.L(r_)= 1;
U_.LO(r_)= 1E-1;
U_.UP(r_)= 1E2;

nu_.FX(i_) = 0;


rho_.L(i_) =1;
rho_.UP(i_) =1E2;
rho_.LO(i_) = 0;

b_.L(i_) = 0;
b_.UP(i_) =10e3;
b_.LO(i_) =-1e3;



fe_.L(i_) = 0.5;
* !! results could sensitive to this number if binding, check expost
fe_.LO(i_) = -1E5;
fe_.UP(i_) = 1E1;

mu_.UP(i_) =  1e2;
mu_.LO(i_) =  -1e3;

sigma_.L = 0.7;
sigma_.UP = 1e3;
sigma_.LO = 1E-5;

* IMPOSE SIGMA > 1 and check whether the fit is better.
* HAVE TO CHANGE BOTH above and below 1 to check for best fit
Sigma_.L = 1.6;
sigma_.LO = 1.1;

x_.LO(i_,R_) = 1e-6;
x_.UP(i_,R_) = 1e4;

theta_.L = 1;
theta_.UP = 30;

* should be secondary energy only
set ene(i) / coa, gas, ely, p_c /;

parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("fd","all ene",r_) = sum(ene, x(ene,r_));

forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logpcfd","all ene",r_) = log(forstata("fd","all ene",r_));

forstata("logPHI",i_,r_) = logphi(i_,r_);
forstata("logpci",i_,r_) = log(w(r_));
forstata("logpci","all ene",r_) = log(w(r_));

* this is total expenditure for the country (for potential weighting)
forstata("total exp",i_,r_) = expenditure(r_);
forstata("total exp","all ene",r_) = expenditure(r_);

parameter  lambda, mu,  sse, fe, theta, epsilon_scale, eta, avgmu;
theta=0;
epsilon_scale=0;
parameter specificationstats for reporting;

* -- define specification :

$if "%spec%"=="tc" model nlls /obj_%objective%, budget, positive_g_cstr /; 
$if "%spec%"=="tc" nbp("non-homoth")= card(r_) + 4*card(i_) + 1;
$if "%spec%"=="theta4" theta_.FX = 4;  model nlls /obj_%objective%, commontheta , budget, positive_g_cstr /; 
$if "%spec%"=="theta4" nbp("non-homoth")= card(r_) + 3*card(i_) + 1 ;
$if "%spec%"=="theta4_noenergy" rho_.FX(ene)=-1; theta_.FX = 4; eta_.FX(i_) = 0; model nlls /obj_%objective%, commontheta , budget, positive_g_cstr /; 


* -----------------------

nlls.reslim = 100000;

* -- Solve model :

* make a savepoint for boostraping
nlls.savepoint=1;
solve nlls using nlp minimizing sse_;
nlls.savepoint=0;


$if "%spec%"=="theta4_noenergy" display sse_.l; 
$if "%spec%"=="theta4_noenergy" $exit


* calculate statistics
coeffs("Theta","coeff", i_) = theta_.L;
coeffs("Epsilon","coeff", i_) = sigma_.L + epsilondiff_.L(i_);
coeffs("Epsilondiff","coeff", i_) = epsilondiff_.L(i_);
coeffs("Sigma","coeff", i_) = sigma_.L;
coeffs("fixed effect","coeff", i_) = fe_.L(i_);

coeffs("rho","coeff", i_) = rho_.L(i_);
coeffs("b","coeff", i_) = b_.L(i_);
coeffs("mu","coeff", i_) = mu_.L(i_);

parameter U;
U(r_) = U_.L(r_);

forstata("fitted nh",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
 (1-sigma_.L)
 * (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_)  * log(U_.L(r_) + b_.L(i_)) )
);


* drop gas and coal if not in the original data
forstata("fitted nh",i_,r_)$(not sectdrop(i_,r_)) = 0; 

* elast of g to U
forstata("elast of g to U",i_,r_)$x(i_,r_) =  rho_.L(i_) * U_.L(r_) / (b_.L(i_) + U_.L(r_));

* note: estimated rho and nu include the (1-sigma) term
forstata("incel",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
forstata("elast of g to U",i_,r_) * sum(i_.local, x(i_,r_)) /
   sum(i_.local, forstata("elast of g to U",i_,r_)  * x(i_,r_));


forstata("incel fitted dem",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
forstata("elast of g to U",i_,r_) * sum(i_.local, forstata("fitted nh",i_,r_)) /
   sum(i_.local, forstata("elast of g to U",i_,r_)  *forstata("fitted nh",i_,r_) );


parameter incelast, avgincelast;

incelast("DIRECT",i_,r_) = forstata("incel",i_,r_);

incelast("DIRECT fitted dem",i_,r_) = forstata("incel fitted dem",i_,r_);

* weighted by final deman
avgincelast("direct","energy goods","all") = sum((r_,se), fd(se,r_) * IncElast("direct",se,r_)) /
	sum((r_,se), fd(se,r_) ) ;

avgincelast("direct","energy goods","lowincome") = sum((lowincome,se), fd(se,lowincome) * IncElast("direct",se,lowincome)) /
	sum((lowincome,se), fd(se,lowincome) ) ;


parameter fittedexp; 

fittedexp("non-homoth",i_,r_) = forstata("fitted nh",i_,r_) / 10e8* pop(r_)  ;


**  
* FOR REPORTING, CHOSE WHETHER TO USE WEIGHTED OR UNWEIGHTED R-SQUARED
parameter indexp_report;
*indexp_report(i_) = 1;
* if using  weighted:
indexp_report(i_) = indexp(i_);

sse_.L   = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted nh",i_,r_) )));

nobs = card(i_) * card(r_);

* calculate the average elasticity of expenditures w.r.t phi :
* weighted by sectors size
avgmu("weighted") = sum(i_, indexp_report(i_) * mu_.L(i_)) / sum(i_, indexp_report(i_));
avgmu("weighted - energy goods") = sum(se, indexp_report(se) * mu_.L(se)) / sum(se, indexp_report(se));


avgmu("unweighted") = sum(i_,   mu_.L(i_)) / card(i_);


* calculate weighted Rsquare for logweighted specification
parameter weightedsstot, weightedrsquared;

weightedrsquared = 0;
weightedsstot = 0;

sse("non-homoth") = sse_.L;

sstot = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));


rsquared("nh","all","total") = 1 - sse_.L / sstot;

* by sector
rsquared("nh","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;


* by sector
rsquared("nh","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("nh","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;


* energy goods only:
rsquared("nh","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;


modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;

* Schwarz = BIC

* in paper use this measure
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") /nobs ;


* only for export -- will not be used
parameter lambda, sigma_scale;

lambda("non-homoth", r_) = U_.L(r_) ;
sigma_scale("non-homoth") = 0 ;
coeffs("backed out theta","coeff",i_) = 1;



* now estimating Homothetic version = CES..

* here, can either fix epsilon-sigma, or fix U
* decide to keep U fixed, and just impose all epsilon to be the same (rho in the model) (to keep U's comparable in level)

* note: rho is epsilon -sigma

* -- HOMOTHETIC VERSION

* re-ininitialize variables

U_.L(r_) = 10;
U_.UP(r_) = 1000;
U_.LO(r_) = 0.0001;
U_.FX("USA") = 10;


fe_.L(i_) = 0;

rho_.L(i_) = 1;

rho_.UP(i_) = 10;
rho_.LO(i_) = -100;

rho_.L(i_) = -10;


sigma_.lo =1.1;

sigma_.L = 1.3;

nu_.FX(i_) = 0;

b_.FX(i_) = 0;


*re-solve with sigmas fixed at one
*theta_.UP = 1E10;
* with no constraint on mu :
$if "%spec%"=="tc"   model nllshomoth /obj_%objective%,  budget, commonrho /; 
$if "%spec%"=="tc" nbp("homoth")= card(r_) + 2*card(i_) + 1 ;


$if "%spec%"=="theta4" theta_.FX = 4;  model nllshomoth /obj_%objective%_H, commontheta , budget_H, commonrho, positive_g_cstr_H /; 
$if "%spec%"=="theta4" nbp("homoth")= card(r_) + card(i_) + 1;

solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap


forstata("fitted h",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
* (1 - sigma_.L) *
  (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_)  * log(U_.L(r_) + b_.L(i_)) )
);


* drop gas and coal if not in the original data
forstata("fitted h",i_,r_)$(not sectdrop(i_,r_)) = 0; 
fittedexp("homoth",i_,r_) = forstata("fitted h",i_,r_) / 10e8* pop(r_)  ;


*** for unweighted R2
sse_.L =  sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted h",i_,r_) )));


modelselection("aic weighted","homoth") = log(sse_.L /nobs) + 2* nbp("homoth") / nobs;
modelselection("bic weighted","homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("homoth") /nobs ;


* Rsquared
rsquared("h","all","total") = 1 - sse_.L / sstot;

* within sector
rsquared("h","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_),sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;

* by sector
rsquared("H","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("h","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;


* energy goods only:
rsquared("h","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;


* compute partial Rsquared for NH version
sse("homoth") = sse_.L;

Parameter withinrsquare;

withinrsquare("all sectors","to homoth") = 1 - (sse("non-homoth") / sse("homoth"));

withinrsquare("energy goods","to homoth") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;




* F-TESTS

*  this is restriction of all NH terms:
*fstat("sigma=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

*  Significance of the rho's
fstat("rho=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* Signficicance of the NU's
* REQUIRES MANUAL IMPUTING OF SSE FROM THE DEMAND_CLM_T4.GMS FILE
**********
*SSE("MU=0") =   	49.4690958842588;
SSE("MU=0") =   702.749944433499;

nbp("MU=0") = nbp("non-homoth") - card(i_);


fstat("mu=1") = ((sse("MU=0") - sse("non-homoth")) / (nbp("non-homoth") - nbp("MU=0"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* will have an F distribution, with (p2-p1, n-p2) = 1.23

* for energy goods only: compute model with sigma = 1 for energy, get SSE and paste it here:
* run tc_noenergy specification
fstat("sigma=1 energy true") = ((708   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* so we need to compare to F(4,5002) .. at 5% significance level, that is about 1.9
* according to http://www.socscistatistics.com/pvalues/fdistribution.aspx  highly signif <0.00001

* alt computation because "true" doesn't work in the quadratic version
set nonene(i);
nonene(i_) = yes;
nonene(ene) = no;


parameter sse_noenergy;
sse_noenergy   = sum((ene,r_)$x(ene,r_), sectdrop(ene,r_) *  indexp_report(ene) *  sqr(log(x(ene,r_)) - log(forstata("fitted h",ene,r_) ))) +
sum((nonene,r_)$x(nonene,r_), sectdrop(nonene,r_) *  indexp_report(nonene) *  sqr(log(x(nonene,r_)) - log(forstata("fitted nh",nonene,r_) )));

* approx
fstat("sigma=1 energy") = ((sse_noenergy   - sse("non-homoth")) / (8)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* fstat of the quadratic term
* this is complicated: in CLM file, compute SSEenergyonly and copy it here
parameter donald;
donald= 42.5 +
sum((nonene,r_)$x(nonene,r_), sectdrop(nonene,r_) *  indexp_report(nonene) *  sqr(log(x(nonene,r_)) - log(forstata("fitted nh",nonene,r_) )));

fstat("mu=1 energy") = ((donald - sse("non-homoth")) / (4  )) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;


* FOR HOMONOTC USING THE CRIE ESTIMATE! SHOULD BE THE SAME..
withinrsquare("to homoth notc","all") = 1 - (sse("non-homoth") /
964
);



specificationstats("sigma") = sigma_.L;
specificationstats("avg incel energy goods") = avgincelast("direct","energy goods","all"); 
specificationstats("avg incel energy goods - low income ctries") = avgincelast("direct","energy goods","lowincome") ;
specificationstats("avgmu - weighted") = avgmu("weighted");
specificationstats("avgmu - weighted - energy goods") = avgmu("weighted - energy goods");
specificationstats("F-stat rho=1") = fstat("rho=1") ;
specificationstats("F-stat mu=1") = fstat("mu=1") ;

specificationstats("R2") = rsquared("nh","all","total");
specificationstats("R2 energy goods") = rsquared("nh","all","energy goods");

specificationstats("within R2") = withinrsquare("all sectors","to homoth") ;
specificationstats("within R2 energy goods") = withinrsquare("energy goods","to homoth");

specificationstats("aic weighted") = modelselection("aic weighted","non-homoth");
specificationstats("bic weighted") = modelselection("bic weighted","non-homoth");

specificationstats("p") = nbp("non-homoth");
specificationstats("n") = nobs;
specificationstats("sse unweighted") = sse("non-homoth");
specificationstats("sstot") = sstot;
specificationstats("avgmu - unweighted") = avgmu("unweighted") ;

specificationstats("bic unweighted") = modelselection("bic unweighted","non-homoth");
specificationstats("theta") = theta;

execute_unload 'estimates\estimates_%ds%_%objective%_%spec%_CLM_flex_shifter.gdx'     coeffs,U, sigma_, epsilondiff_, forstata, weightedrsquared, b_, rho_, nu_, fe_, mu_, ex, im, cst, lambda, sigma_scale, fittedexp, sectdrop, expshare, rsquared, incelast, withinrsquare, sse, modelselection, nbp, specificationstats, fstat;

